10  Obj. 2 - Model contribution

Purpose: quantify the effects of distance, catchment area, groundwater index, and connectivity on the proportional contribution of tributaries streams to the mainstem Snake River population of Yellowstone cutthroat trout.

10.1 Data

10.1.1 Mixing proportions

Load GSI/rubias output: bootstrapped mixing proportional contribution of reporting groups (tributaries) to the mainstem Snake River.

Code
gsi_dat <- readRDS("/Users/jeffbaldock/Library/CloudStorage/GoogleDrive-jbaldock@uwyo.edu/Shared drives/wyo-coop-baldock/UWyoming/Snake River Cutthroat/Analyses/Snake River GSI Quarto/GSI Analysis/By Section and Year/UpperSnake_GSI_SectionYear_output.RDS")

10.1.2 Covariate data

Groundwater, distance, basin size, and connnectivity

Code
# groundwater metrics
gwmet <- read_csv("/Users/jeffbaldock/Library/CloudStorage/GoogleDrive-jbaldock@uwyo.edu/Shared drives/wyo-coop-baldock/UWyoming/Snake River Cutthroat/Analyses/Snake River GSI Quarto/Landscape Covariates/Groundwater/GroundwaterMetrics_raw_RepUnits.csv") %>% 
  rename(repunit = site) %>% 
  mutate(logarea = log(areasqkm), loggwi = log(gwi_iew05km)) %>% 
  mutate(z_logarea = as.numeric(scale(logarea)), z_loggwi = as.numeric(scale(loggwi))) %>%
  arrange(repunit)
# gwmet$repunit <- str_replace_all(gwmet$repunit, "deadman _greys", "deadman_greys")

# flowline distances
dist <- read_csv("/Users/jeffbaldock/Library/CloudStorage/GoogleDrive-jbaldock@uwyo.edu/Shared drives/wyo-coop-baldock/UWyoming/Snake River Cutthroat/Analyses/Snake River GSI Quarto/Landscape Covariates/Flowline Distance/SnakeRiverSections_RepUnits_FlowlineDistance.csv") %>%
  mutate(z_dist = as.numeric(scale(distkm)))

# barriers
barr <- read_csv("/Users/jeffbaldock/Library/CloudStorage/GoogleDrive-jbaldock@uwyo.edu/Shared drives/wyo-coop-baldock/UWyoming/Snake River Cutthroat/Analyses/Snake River GSI Quarto/Landscape Covariates/Barriers/RepUnits_BarrierSummary.csv") %>%
  mutate(log_barrier_dens = log(barrier_dens + 0.0001),
         z_barrier_dens = as.numeric(scale(barrier_dens)),
         z_barrier_num = as.numeric(scale(numbarr)),
         z_log_barrier_dens = as.numeric(scale(log_barrier_dens)),
         connect_type = as.factor(ifelse(is.na(connect_type), "connected", connect_type))) %>%
  mutate(connect_type = recode(connect_type, "culvert" = "culvert_diversion", "diversion dam" = "culvert_diversion"))

# Landcover
land <- read_csv("/Users/jeffbaldock/Library/CloudStorage/GoogleDrive-jbaldock@uwyo.edu/Shared drives/wyo-coop-baldock/UWyoming/Snake River Cutthroat/Analyses/Snake River GSI Quarto/Landscape Covariates/Landcover/RepUnits_LandcoverSummary.csv") %>%
  mutate(log_prop_ag = log(prop_ag + 0.0001),
         log_prop_dev = log(prop_dev + 0.0001),
         log_prop_agdev = log(prop_agdev + 0.0001),
         z_prop_ag = as.numeric(scale(prop_ag)),
         z_prop_dev = as.numeric(scale(prop_dev)),
         z_prop_agdev = as.numeric(scale(prop_agdev)),
         z_log_prop_ag = as.numeric(scale(log_prop_ag)),
         z_log_prop_dev = as.numeric(scale(log_prop_dev)),
         z_log_prop_agdev = as.numeric(scale(log_prop_agdev)))

Create tibble of means and SDs

Code
scltib <- tibble(param = c("logarea", "loggwi", "dist", "barrier_dens", "prop_ag", "prop_dev", "prop_agdev"), 
                 mean = c(attributes(scale(gwmet$logarea))$`scaled:center`, attributes(scale(gwmet$loggwi))$`scaled:center`, attributes(scale(dist$distkm))$`scaled:center`, attributes(scale(barr$barrier_dens))$`scaled:center`, attributes(scale(land$prop_ag))$`scaled:center`, attributes(scale(land$prop_dev))$`scaled:center`, attributes(scale(land$prop_agdev))$`scaled:center`),  
                 sd = c(attributes(scale(gwmet$logarea))$`scaled:scale`, attributes(scale(gwmet$loggwi))$`scaled:scale`, attributes(scale(dist$distkm))$`scaled:scale`, attributes(scale(barr$barrier_dens))$`scaled:scale`, attributes(scale(land$prop_ag))$`scaled:scale`, attributes(scale(land$prop_dev))$`scaled:scale`, attributes(scale(land$prop_agdev))$`scaled:scale`))
write_csv(scltib, "/Users/jeffbaldock/Library/CloudStorage/GoogleDrive-jbaldock@uwyo.edu/Shared drives/wyo-coop-baldock/UWyoming/Snake River Cutthroat/Analyses/Snake River GSI Quarto/Obj2 Model Contribution/GSI_Covariates_MeansStDevs.csv")

10.1.3 Create long data

Code
data_long <- gsi_dat$bootstrapped_proportions %>%  
  left_join(gsi_dat$indiv_posteriors %>% group_by(indiv, mixture_collection) %>% summarize(n = n()) %>% group_by(mixture_collection) %>% summarize(sampsize = n())) %>% 
  mutate(bscorrnum = round(bs_corrected_repunit_ppn*sampsize, digits = 0),
         section = str_sub(mixture_collection, end = -6), 
         year = str_sub(mixture_collection, start = -4)) %>% 
  left_join(gwmet) %>% left_join(dist) %>% left_join(barr %>% select(repunit, barrier_dens, log_barrier_dens, connectivity, z_barrier_dens, z_log_barrier_dens, z_barrier_num, connect_type)) %>% left_join(land)
head(data_long)
# A tibble: 6 × 35
  mixture_collection   repunit bs_corrected_repunit…¹ sampsize bscorrnum section
  <chr>                <chr>                    <dbl>    <int>     <dbl> <chr>  
1 snake_wilsonsouthpa… bailey…             0               131         0 snake_…
2 snake_wilsonsouthpa… blackr…             0.00000215      131         0 snake_…
3 snake_wilsonsouthpa… blackr…             0.0000550       131         0 snake_…
4 snake_wilsonsouthpa… blackt…             0.00600         131         1 snake_…
5 snake_wilsonsouthpa… blindb…             0.0000448       131         0 snake_…
6 snake_wilsonsouthpa… cody_b…             0.0000801       131         0 snake_…
# ℹ abbreviated name: ¹​bs_corrected_repunit_ppn
# ℹ 29 more variables: year <chr>, areasqkm <dbl>, gwi_point <dbl>,
#   gwi_iew05km <dbl>, logarea <dbl>, loggwi <dbl>, z_logarea <dbl>,
#   z_loggwi <dbl>, distkm <dbl>, z_dist <dbl>, barrier_dens <dbl>,
#   log_barrier_dens <dbl>, connectivity <dbl>, z_barrier_dens <dbl>,
#   z_log_barrier_dens <dbl>, z_barrier_num <dbl>, connect_type <fct>,
#   prop_ag <dbl>, prop_dev <dbl>, prop_agdev <dbl>, log_prop_ag <dbl>, …
Code
write_csv(data_long, "/Users/jeffbaldock/Library/CloudStorage/GoogleDrive-jbaldock@uwyo.edu/Shared drives/wyo-coop-baldock/UWyoming/Snake River Cutthroat/Analyses/Snake River GSI Quarto/Obj2 Model Contribution/Contribution_data_long.csv")

10.2 Visualize data

10.2.1 Covariate collinearity

How correlated are the covariates?

Code
ggpairs(data_long %>% select(z_dist, z_logarea, z_loggwi, connect_type))

10.2.2 Contribution-covariate relationships

Plot proportional contribution against covariates

Code
p1 <- data_long %>% ggplot(aes(x = z_dist, y = bs_corrected_repunit_ppn)) + geom_point() + geom_smooth()
p2 <- data_long %>% ggplot(aes(x = z_logarea, y = bs_corrected_repunit_ppn)) + geom_point() + geom_smooth()
p3 <- data_long %>% ggplot(aes(x = z_loggwi, y = bs_corrected_repunit_ppn)) + geom_point() + geom_smooth()
p4 <- data_long %>% ggplot(aes(x = connect_type, y = bs_corrected_repunit_ppn)) + geom_boxplot() + geom_smooth()

ggarrange(p1, p2, p3, p4, nrow = 2, ncol = 2)

10.2.3 Connectivity sample size

How many streams and observations do we have for each type of connectivity? Coverage for types “culvert_diversion” and “waterfall” is low at the scale of individual streams. But replication across sections and years helps with that.

Code
barr %>% group_by(connect_type) %>% summarize(n.streams = n()) %>% left_join(data_long %>% group_by(connect_type) %>% summarize(n.obs = n()))
# A tibble: 4 × 3
  connect_type      n.streams n.obs
  <fct>                 <int> <int>
1 connected                39   663
2 culvert_diversion         3    51
3 low flow                  7   119
4 waterfall                 3    51

10.3 ZOIB

Fit a zero/one inflated beta regression model to estimate the effects of tributary/reporting group covariates on proportional contribution to the mainstem Snake River. Preliminary work tested multivariate model structures, such as Dirichlet and Dirichlet-multinomial (DM) models sensu Chong and Spencer (2018), Harrison et al. (2020), and Averill et al. (2021). DM models are well-suited for modeling compositional data as they account for covariance among group-level proportions due to the sum-to-1 constraint. However, covariate extensions of DM models treat each composition as a single observational unit and apply the covariate data to the composition as a whole, rather than to each of the component groups (reporting groups or tributaries), as is the goal of this study. Preliminary DM modelling indicated that violations to this covariate data structure led to highly overparameterized model fits.

As recommended in Douma and Weedon (2019), we therefore use beta regression to quantify the effects of tributary covariates on proportional contribution. Beta regression is well-suited for modeling continuous proportions (as is the output of the rubias) and can be extended to account for excess zeros (zero inflation) and variance/precision heterogeneity (i.e., variable phi). While beta regression is designed for analyzing proportional data stemming from just 2 categories, multivariate extensions (i.e., Dirichlet regression) are not appropriate for our purposes given the constraints mentioned above (covariates are applied to the entire composition rather than each group individually). As a result, beta regression does not ensure the sum-to-1 constraint, however, we believe a change in one category will not necessarily lead to a change in all other categories due to the sheer number of categories in our study (i.e., reporting groups, n = 52). Furthermore, we are not specifically interested in making accurate predictions, but rather characterizing general patterns, and therefore believe the constraints of multivariate/compositional data can be relaxed. One could use a random effects structure to account for grouping of data into sections/years. But random intercepts do not make sense for proportional data (e.g., variation in section-specific mean contribution - randome intercept - would be driven by among-group heterogeneity in intercepts, not greater contribution across all groups within that section, because the proportions sum to 1). And random slopes add excess complexity to the model that we cannot justify. Preliminary models fit with both random slopes and intercepts had difficulty converging.

10.3.1 Fit model

Fit model using brms

Code
data_long <- data_long %>% mutate(repunit = as.factor(repunit), section = as.factor(section), year = as.factor(year))

# full model
mod_zoib1 <- brm(brmsformula(
  bs_corrected_repunit_ppn ~ (1 + z_loggwi | section) + (1|repunit)  + z_dist + z_logarea + z_loggwi + connect_type, 
  phi ~ z_dist + z_logarea + z_loggwi + connect_type, 
  zi ~ z_dist + z_logarea + z_loggwi + connect_type, 
  family = zero_inflated_beta()), data = data_long, iter = 4000)

# model with no zero inflation or variable phi...how much variation do just the covariates explain?
# mod_zoib0 <- brm(brmsformula(
#   bs_corrected_repunit_ppn ~ z_dist + z_logarea + z_loggwi + connect_type, 
#   phi ~ 1, 
#   zi ~ 1, 
#   family = zero_inflated_beta()), data = data_long, iter = 4000)

# write model objects to file 
saveRDS(mod_zoib1, "/Users/jeffbaldock/Library/CloudStorage/GoogleDrive-jbaldock@uwyo.edu/Shared drives/wyo-coop-baldock/UWyoming/Snake River Cutthroat/Analyses/Snake River GSI Quarto/Obj2 Model Contribution/brms_fit_zoib_fullmodel.rds")
# saveRDS(mod_zoib0, "/Users/jeffbaldock/Library/CloudStorage/GoogleDrive-jbaldock@uwyo.edu/Shared drives/wyo-coop-baldock/UWyoming/Snake River Cutthroat/Analyses/Snake River GSI Quarto/Obj 2 Model Contribution/bbrms_fit_zoib_fixefonly.rds")

Load fitted model object (to reduce rendering time)

Code
mod_zoib1 <- readRDS("/Users/jeffbaldock/Library/CloudStorage/GoogleDrive-jbaldock@uwyo.edu/Shared drives/wyo-coop-baldock/UWyoming/Snake River Cutthroat/Analyses/Snake River GSI Quarto/Obj2 Model Contribution/brms_fit_zoib_fullmodel.rds")

View model summary

Code
summary(mod_zoib1)
 Family: zero_inflated_beta 
  Links: mu = logit; phi = log; zi = logit 
Formula: bs_corrected_repunit_ppn ~ (1 + z_loggwi | section) + (1 | repunit) + z_dist + z_logarea + z_loggwi + connect_type 
         phi ~ z_dist + z_logarea + z_loggwi + connect_type
         zi ~ z_dist + z_logarea + z_loggwi + connect_type
   Data: data_long (Number of observations: 884) 
  Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
         total post-warmup draws = 8000

Multilevel Hyperparameters:
~repunit (Number of levels: 52) 
              Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept)     0.73      0.09     0.57     0.93 1.00     1878     3424

~section (Number of levels: 6) 
                        Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
sd(Intercept)               0.31      0.15     0.12     0.71 1.00     2517
sd(z_loggwi)                0.40      0.19     0.17     0.91 1.00     2449
cor(Intercept,z_loggwi)    -0.51      0.37    -0.97     0.38 1.00     2304
                        Tail_ESS
sd(Intercept)               3048
sd(z_loggwi)                2110
cor(Intercept,z_loggwi)     3530

Regression Coefficients:
                                  Estimate Est.Error l-95% CI u-95% CI Rhat
Intercept                            -4.87      0.20    -5.26    -4.48 1.00
phi_Intercept                         4.28      0.09     4.09     4.46 1.00
zi_Intercept                         -0.65      0.09    -0.82    -0.48 1.00
z_dist                               -1.37      0.07    -1.50    -1.23 1.00
z_logarea                             0.76      0.13     0.50     1.01 1.00
z_loggwi                              0.50      0.22     0.07     0.95 1.00
connect_typeculvert_diversion        -1.70      0.55    -2.79    -0.65 1.00
connect_typelowflow                  -1.93      0.37    -2.66    -1.19 1.00
connect_typewaterfall                -2.75      0.51    -3.75    -1.72 1.00
phi_z_dist                            1.10      0.08     0.94     1.26 1.00
phi_z_logarea                        -0.60      0.10    -0.80    -0.41 1.00
phi_z_loggwi                         -0.47      0.09    -0.66    -0.29 1.00
phi_connect_typeculvert_diversion     1.41      0.39     0.59     2.11 1.00
phi_connect_typelowflow               1.91      0.25     1.41     2.38 1.00
phi_connect_typewaterfall             2.90      0.36     2.13     3.56 1.00
zi_z_dist                             0.41      0.08     0.25     0.57 1.00
zi_z_logarea                         -0.19      0.08    -0.35    -0.03 1.00
zi_z_loggwi                          -0.08      0.09    -0.25     0.09 1.00
zi_connect_typeculvert_diversion      0.11      0.31    -0.50     0.72 1.00
zi_connect_typelowflow               -0.38      0.24    -0.85     0.07 1.00
zi_connect_typewaterfall              0.60      0.31    -0.01     1.20 1.00
                                  Bulk_ESS Tail_ESS
Intercept                             1600     2451
phi_Intercept                         6139     4674
zi_Intercept                          9845     6148
z_dist                                5281     5440
z_logarea                             2111     3693
z_loggwi                              1601     2849
connect_typeculvert_diversion         3133     4425
connect_typelowflow                   1831     3653
connect_typewaterfall                 2921     4047
phi_z_dist                            4885     5899
phi_z_logarea                         5267     5663
phi_z_loggwi                          5798     6078
phi_connect_typeculvert_diversion     6561     5211
phi_connect_typelowflow               6239     5667
phi_connect_typewaterfall             7400     4889
zi_z_dist                             8342     5334
zi_z_logarea                          9439     6254
zi_z_loggwi                           7990     6383
zi_connect_typeculvert_diversion      8965     5769
zi_connect_typelowflow                8928     6320
zi_connect_typewaterfall              9186     5842

Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).

Bayesian R-squared

Code
# full model
par(mar = c(4,4,1,1), mgp = c(2.5,1,0))
myr2 <- bayes_R2(mod_zoib1, summary = FALSE)
myr2hdi <- HDInterval::hdi(myr2, credMass = 0.95)
hist(myr2, xlab = "Bayesian R-squared", xlim = c(0,1), main = "", breaks = 20)
abline(v = median(myr2), lwd = 3, col = "darkred")
abline(v = unlist(myr2hdi), lwd = 1, lty = 2, col = "darkred")
legend("topleft", legend = c(paste("median = ", round(median(myr2), digits = 3), sep = ""), 
                              paste("95% HDI = ", round(myr2hdi[1], digits = 3), " - ", round(myr2hdi[2], digits = 3), sep = "")), 
       lty = c(1,2), lwd = c(3,1), col = "darkred", bty = "n")

Code
# full model
# myr2 <- bayes_R2(mod_zoib0, summary = FALSE)
# myr2hdi <- HDInterval::hdi(myr2, credMass = 0.95)
# hist(myr2, xlab = "Bayesian R-squared", xlim = c(0,1), main = "No zero-inflation, no variable phi")
# abline(v = median(myr2), lwd = 3, col = "darkred")
# abline(v = unlist(myr2hdi), lwd = 1, lty = 2, col = "darkred")
# legend("topright", legend = c(paste("median = ", round(median(myr2), digits = 3), sep = ""), 
#                               paste("95% HDI = ", round(myr2hdi[1], digits = 3), " - ", round(myr2hdi[2], digits = 3), sep = "")), 
#        lty = c(1,2), lwd = c(3,1), col = "darkred", bty = "n")

10.3.2 Convergence diagnostics

Any r-hat values >1.01?

Code
brms::rhat(mod_zoib1)[brms::rhat(mod_zoib1) >= 1.01]
named numeric(0)

Traceplots indicate model has converged

Code
plot(mod_zoib1, variable = "^b_", ask = FALSE, regex = TRUE)

LOO-CV indicates the model is well-specified given the data (with the exception of 1 outlier)

Code
myloo <- loo(mod_zoib1)
myloo

Computed from 8000 by 884 log-likelihood matrix.

         Estimate    SE
elpd_loo   2094.2  97.7
p_loo        65.6   6.5
looic     -4188.4 195.4
------
MCSE of elpd_loo is NA.
MCSE and ESS estimates assume MCMC draws (r_eff in [0.5, 2.0]).

Pareto k diagnostic values:
                         Count Pct.    Min. ESS
(-Inf, 0.7]   (good)     881   99.7%   755     
   (0.7, 1]   (bad)        2    0.2%   <NA>    
   (1, Inf)   (very bad)   1    0.1%   <NA>    
See help('pareto-k-diagnostic') for details.
Code
plot(myloo, label_points = TRUE)

10.3.3 PP-checks

Generate posterior predicted proportions and calculate median across iterations

Code
yrep <- brms::posterior_predict(mod_zoib1, ndraws = 8000)
yrep_sum <- apply(yrep, 2, median)
dim(yrep)
[1] 8000  884

Bayesian p-value (closer to 0.5 is better). Suggests slightly biased model predictions

Code
length(which(yrep_sum > data_long$bs_corrected_repunit_ppn)) / length(data_long$bs_corrected_repunit_ppn)
[1] 0.4355204

How well can we replicate the observed data? Generally, poor. This because the covariates explain a relatively small proportion of the variance in proportional contribution. The strong effects of covariates on precision provide additional evidence for proportional contributions being hard to predict with just the covariates used in this model.

Code
pp_check(mod_zoib1, type = "scatter_avg") + coord_flip()

Can we replicate the distribution of the observed data? (Yes)

Code
pp_check(mod_zoib1, type = "hist", ndraws = 10)

What is the distribution of residuals relative to 0? Are residuals biased in either direction? (No)

Code
pp_check(mod_zoib1, type = "error_hist", ndraws = 10) + geom_vline(xintercept = 0, linetype = "dashed") #+ xlim(-0.2, 0.2)

How well can we replicate other features of the observed data? Proportion of zeros? (Looks good)

Code
prop_zero <- function(x) mean(x == 0)
ppc_stat(y = data_long$bs_corrected_repunit_ppn, yrep = yrep, stat = "prop_zero")

Mean? (Somewhat under-predicts the mean of the data)

Code
ppc_stat(y = data_long$bs_corrected_repunit_ppn, yrep = yrep, stat = "mean")

10.4 Plot model output

Inverse logit function

Code
invlogit <- function(x) { exp(x) / (1 + exp(x)) }

List model variables

Code
get_variables(mod_zoib1)
  [1] "b_Intercept"                                  
  [2] "b_phi_Intercept"                              
  [3] "b_zi_Intercept"                               
  [4] "b_z_dist"                                     
  [5] "b_z_logarea"                                  
  [6] "b_z_loggwi"                                   
  [7] "b_connect_typeculvert_diversion"              
  [8] "b_connect_typelowflow"                        
  [9] "b_connect_typewaterfall"                      
 [10] "b_phi_z_dist"                                 
 [11] "b_phi_z_logarea"                              
 [12] "b_phi_z_loggwi"                               
 [13] "b_phi_connect_typeculvert_diversion"          
 [14] "b_phi_connect_typelowflow"                    
 [15] "b_phi_connect_typewaterfall"                  
 [16] "b_zi_z_dist"                                  
 [17] "b_zi_z_logarea"                               
 [18] "b_zi_z_loggwi"                                
 [19] "b_zi_connect_typeculvert_diversion"           
 [20] "b_zi_connect_typelowflow"                     
 [21] "b_zi_connect_typewaterfall"                   
 [22] "sd_repunit__Intercept"                        
 [23] "sd_section__Intercept"                        
 [24] "sd_section__z_loggwi"                         
 [25] "cor_section__Intercept__z_loggwi"             
 [26] "Intercept"                                    
 [27] "Intercept_phi"                                
 [28] "Intercept_zi"                                 
 [29] "r_repunit[bailey_NA,Intercept]"               
 [30] "r_repunit[blackrock_lower,Intercept]"         
 [31] "r_repunit[blackrock_upper,Intercept]"         
 [32] "r_repunit[blacktail_NA,Intercept]"            
 [33] "r_repunit[blindbull_NA,Intercept]"            
 [34] "r_repunit[boulder_NA,Intercept]"              
 [35] "r_repunit[box_NA,Intercept]"                  
 [36] "r_repunit[cabin_NA,Intercept]"                
 [37] "r_repunit[clear_NA,Intercept]"                
 [38] "r_repunit[cliff_NA,Intercept]"                
 [39] "r_repunit[cody_bluecrane,Intercept]"          
 [40] "r_repunit[cottonwood_grosventre,Intercept]"   
 [41] "r_repunit[cottonwood_nps,Intercept]"          
 [42] "r_repunit[cowboycabin_NA,Intercept]"          
 [43] "r_repunit[crystal_lower,Intercept]"           
 [44] "r_repunit[crystal_upper,Intercept]"           
 [45] "r_repunit[deadman_greys,Intercept]"           
 [46] "r_repunit[deadmansbar_NA,Intercept]"          
 [47] "r_repunit[dell_NA,Intercept]"                 
 [48] "r_repunit[ditch_NA,Intercept]"                
 [49] "r_repunit[dog_NA,Intercept]"                  
 [50] "r_repunit[fall_coburn,Intercept]"             
 [51] "r_repunit[fish_grosventre,Intercept]"         
 [52] "r_repunit[fish_NA,Intercept]"                 
 [53] "r_repunit[flat_NA,Intercept]"                 
 [54] "r_repunit[fordspring_NA,Intercept]"           
 [55] "r_repunit[goosewing_NA,Intercept]"            
 [56] "r_repunit[granite_lower,Intercept]"           
 [57] "r_repunit[granite_upper,Intercept]"           
 [58] "r_repunit[grosventre_lower,Intercept]"        
 [59] "r_repunit[hoback_upper,Intercept]"            
 [60] "r_repunit[horse_NA,Intercept]"                
 [61] "r_repunit[lava_NA,Intercept]"                 
 [62] "r_repunit[leidy_NA,Intercept]"                
 [63] "r_repunit[littlegreys_steer,Intercept]"       
 [64] "r_repunit[lowerbarbc_NA,Intercept]"           
 [65] "r_repunit[mosquito_NA,Intercept]"             
 [66] "r_repunit[northbuffalofork_NA,Intercept]"     
 [67] "r_repunit[pacific_NA,Intercept]"              
 [68] "r_repunit[rock_NA,Intercept]"                 
 [69] "r_repunit[shoal_NA,Intercept]"                
 [70] "r_repunit[slate_NA,Intercept]"                
 [71] "r_repunit[snakeriversidechannel_NA,Intercept]"
 [72] "r_repunit[spread_southfork,Intercept]"        
 [73] "r_repunit[spread_uppermainstem,Intercept]"    
 [74] "r_repunit[spreadnf_flagstaff,Intercept]"      
 [75] "r_repunit[spring_nps,Intercept]"              
 [76] "r_repunit[spring_tss,Intercept]"              
 [77] "r_repunit[threechannel_NA,Intercept]"         
 [78] "r_repunit[upperbarbc_NA,Intercept]"           
 [79] "r_repunit[white_NA,Intercept]"                
 [80] "r_repunit[willow_NA,Intercept]"               
 [81] "r_section[snake_astoriawesttable,Intercept]"  
 [82] "r_section[snake_deadmansmoose,Intercept]"     
 [83] "r_section[snake_moosewilson,Intercept]"       
 [84] "r_section[snake_pacificdeadmans,Intercept]"   
 [85] "r_section[snake_southparkastoria,Intercept]"  
 [86] "r_section[snake_wilsonsouthpark,Intercept]"   
 [87] "r_section[snake_astoriawesttable,z_loggwi]"   
 [88] "r_section[snake_deadmansmoose,z_loggwi]"      
 [89] "r_section[snake_moosewilson,z_loggwi]"        
 [90] "r_section[snake_pacificdeadmans,z_loggwi]"    
 [91] "r_section[snake_southparkastoria,z_loggwi]"   
 [92] "r_section[snake_wilsonsouthpark,z_loggwi]"    
 [93] "lprior"                                       
 [94] "lp__"                                         
 [95] "accept_stat__"                                
 [96] "stepsize__"                                   
 [97] "treedepth__"                                  
 [98] "n_leapfrog__"                                 
 [99] "divergent__"                                  
[100] "energy__"                                     

10.4.1 Primary effects

10.4.1.1 Generic plots

Model output indicates a negative effect of tributary-mainstem distance and positive effects of tributary catchment size and groundwater on proportional contribution of tributaries to the mainstem Snake River. Additionally, connected tributaries contribute considerably more than tributaries with disrupted connectivity.

Code
plot(conditional_effects(mod_zoib1), ask = FALSE, rug = TRUE)

View posterior distributions for continuous covariates to compare relative effect sizes: effect of distance is strongest, followed by area and finally groundwater.

Code
mod_zoib1 %>%
  spread_draws(b_z_dist, b_z_logarea, b_z_loggwi) %>%
  gather(b_z_dist:b_z_loggwi, key = "param", value = "estimate") %>%
  mutate(param = factor(recode(param, "b_z_dist" = "Distance", "b_z_logarea" = "Area", "b_z_loggwi" = "Groundwater"), levels = c("Groundwater", "Area", "Distance"))) %>% 
  ggplot(aes(y = param, x = estimate)) +
  stat_halfeye() + 
  geom_vline(xintercept = 0, linetype = "dashed") + #xlim(-0.3,1.5) +
  xlab("Posterion estimate") + ylab("Parameter") + theme_bw()

Section-specific effects of groundwater on proportional contribution: effect of groundwater is much stronger in upstream sections, exceeding the magnitude of the area effect in section A.

Code
mod_zoib1 %>%
  spread_draws(b_z_loggwi, r_section[section,z_loggwi]) %>%
  filter(z_loggwi == "z_loggwi") %>% 
  mutate(section_mean_loggwi = b_z_loggwi + r_section,
         section = factor(recode(section, 
                            "snake_pacificdeadmans" = "A",
                            "snake_deadmansmoose" = "B",
                            "snake_moosewilson" = "C",
                            "snake_wilsonsouthpark" = "D",
                            "snake_southparkastoria" = "E",
                            "snake_astoriawesttable" = "F"), levels = c("F", "E", "D", "C", "B", "A"))) %>%
  ggplot(aes(y = section, x = section_mean_loggwi)) +
  stat_halfeye() + 
  geom_vline(xintercept = 0, linetype = "dashed") + xlim(-0.3,1.5) +
  xlab("Section-specific effect of groundwater on proportional contribution") + ylab("Mainstem Snake River section") + theme_bw()

10.4.1.2 Pretty plots

Connectivity
Code
mydraws <- mod_zoib1 %>% 
  spread_draws(b_Intercept, b_connect_typeculvert_diversion, b_connect_typelowflow, b_connect_typewaterfall) %>%
  mutate(beta_connected = invlogit(b_Intercept),
         beta_culvdiv = invlogit(b_Intercept + b_connect_typeculvert_diversion),
         beta_lowflow = invlogit(b_Intercept + b_connect_typelowflow),
         beta_waterfall = invlogit(b_Intercept + b_connect_typewaterfall)) %>%
  select(-c(b_Intercept, b_connect_typeculvert_diversion, b_connect_typelowflow, b_connect_typewaterfall)) %>%
  summarise_draws()

Marginal effect of connectivity on proportional contribution of tributary streams to the mainstem Snake River population of YCT.
Distance

Connected only

Marginal effect of flowline distance (km) on proportional contribution of tributary streams to the mainstem Snake River population of YCT.

All connect types

Additive effects of flowline distance (km) and connectivity on proportional contribution of tributary streams to the mainstem Snake River population of YCT.
Area

Connected only

Marginal effect of (log) catchment area (km^2) on proportional contribution of tributary streams to the mainstem Snake River population of YCT.

All connect types

Additive effects of (log) catchment area (km^2) and connectivity on proportional contribution of tributary streams to the mainstem Snake River population of YCT.
Groundwater

Connected only

Marginal effect of (log) groundwater availability on proportional contribution of tributary streams to the mainstem Snake River population of YCT.

All connect types

Additive effects of (log) groundwater availability and connectivity on proportional contribution of tributary streams to the mainstem Snake River population of YCT.

Marginal effects of tributary (a) connectivity, (b) distance, (c) area, and (d) groundwater availavility on proportional contribution

10.4.2 Random effects

Random effect of reporting unit, offset to the global intercept. Random intercepts indicate reporting units that contribute more or less than expected, after account for the effects of distance, area, and groundwater (and zero-inflation and precision components of the model).

Code
# define spatial clusters/regions
greys <- c("littlegreys_steer", "white_NA", "blindbull_NA", "deadman_greys")
hoback <- c("willow_NA", "boulder_NA", "granite_lower", "granite_upper", "shoal_NA", "cliff_NA", "dell_NA", "hoback_upper")
grosventre <- c("crystal_lower", "crystal_upper", "slate_NA", "goosewing_NA", "cottonwood_grosventre", "fish_grosventre")
spread <- c("rock_NA", "spread_uppermainstem", "spreadnf_flagstaff", "spread_southfork", "leidy_NA")
buffalo <- c("blackrock_lower", "blackrock_upper", "lava_NA", "box_NA", "clear_NA", "northbuffalofork_NA", "pacific_NA", "spring_nps")

# load loation data and edit names, regions
gps <- read_csv("/Users/jeffbaldock/Library/CloudStorage/GoogleDrive-jbaldock@uwyo.edu/Shared drives/wyo-coop-baldock/UWyoming/Snake River Cutthroat/Analyses/Snake River GSI Quarto/Landscape Covariates/Location Data/RepUnit_LatLong.csv") %>% 
  arrange(desc(lat)) %>%
  mutate(repunit_num = c(1:52)) %>%
  mutate(repunit2 = gsub("_NA", "", repunit)) %>%
  mutate(repunit2 = recode(repunit2, "snakeriversidechannel" = "srsidechannel", "spread_uppermainstem" = "spread_mainstem", "cottonwood_grosventre" = "cottonwood_gv")) %>%
  mutate(name = paste(repunit_num, ". ", repunit2, sep = ""),
         region = ifelse(repunit %in% greys, "Greys",
                         ifelse(repunit %in% hoback, "Hoback",
                                ifelse(repunit %in% grosventre, "Gros Ventre",
                                       ifelse(repunit %in% spread, "Spread",
                                              ifelse(repunit %in% buffalo, "Buffalo Fork", "SR Corridor"))))))
gps
# A tibble: 52 × 7
   repunit                lat   lon repunit_num repunit2           name   region
   <chr>                <dbl> <dbl>       <int> <chr>              <chr>  <chr> 
 1 northbuffalofork_NA   43.9 -110.           1 northbuffalofork   1. no… Buffa…
 2 box_NA                43.9 -110.           2 box                2. box Buffa…
 3 spring_nps            43.9 -111.           3 spring_nps         3. sp… Buffa…
 4 pacific_NA            43.9 -111.           4 pacific            4. pa… Buffa…
 5 clear_NA              43.9 -110.           5 clear              5. cl… Buffa…
 6 lava_NA               43.9 -110.           6 lava               6. la… Buffa…
 7 blackrock_lower       43.8 -110.           7 blackrock_lower    7. bl… Buffa…
 8 blackrock_upper       43.8 -110.           8 blackrock_upper    8. bl… Buffa…
 9 spread_uppermainstem  43.8 -110.           9 spread_mainstem    9. sp… Spread
10 spreadnf_flagstaff    43.8 -110.          10 spreadnf_flagstaff 10. s… Spread
# ℹ 42 more rows
Code
p1 <- mod_zoib1 %>%
  spread_draws(r_repunit[repunit,]) %>%
  #compare_levels(r_repunit, by = repunit) %>%
  ungroup() %>%
  left_join(gps %>% select(repunit, name, lat, region)) %>%
  mutate(name = reorder(name, r_repunit)) %>%
  ggplot(aes(y = name, x = r_repunit)) +
  stat_pointinterval(.width = c(.80, .95)) +
  geom_vline(xintercept = 0, linetype = "dashed") +
  ylab("Reporting unit") + xlab("Offset to intercept") + 
  #scale_colour_brewer(palette = "Dark2") +
  theme_bw()
print(p1)

Code
# flowline
flowline <- vect("/Users/jeffbaldock/Library/CloudStorage/GoogleDrive-jbaldock@uwyo.edu/Shared drives/wyo-coop-baldock/UWyoming/Snake River Cutthroat/Data/Spatial/Flowline/FlowlineEditing/SnakeHeadwaters_flowline_springsclean.shp")

# reporting unit locations
sites_repu <- vect("/Users/jeffbaldock/Library/CloudStorage/GoogleDrive-jbaldock@uwyo.edu/Shared drives/wyo-coop-baldock/UWyoming/Snake River Cutthroat/Analyses/Snake River GSI Quarto/Landscape Covariates/Location Data/RepUnit_SpatialLocations.shp")
sites_repu <- sort(sites_repu, "repunit")

# summarize reporting unit random effects
rpint_summ <- mod_zoib1 %>%
  spread_draws(r_repunit[repunit,]) %>%
  group_by(repunit) %>%
  median_qi(r_repunit) %>%
  ungroup() %>%
  mutate(repunit = reorder(repunit, r_repunit))
rpint_summ
# A tibble: 52 × 7
   repunit         r_repunit .lower  .upper .width .point .interval
   <fct>               <dbl>  <dbl>   <dbl>  <dbl> <chr>  <chr>    
 1 bailey_NA          0.898   0.253  1.50     0.95 median qi       
 2 blackrock_lower   -0.598  -1.26  -0.0129   0.95 median qi       
 3 blackrock_upper   -0.635  -1.20  -0.110    0.95 median qi       
 4 blacktail_NA       0.201  -0.452  0.810    0.95 median qi       
 5 blindbull_NA       0.403  -0.280  1.05     0.95 median qi       
 6 boulder_NA         0.800   0.187  1.32     0.95 median qi       
 7 box_NA             0.450  -0.344  1.24     0.95 median qi       
 8 cabin_NA           0.358  -0.240  0.894    0.95 median qi       
 9 clear_NA          -0.0613 -0.791  0.595    0.95 median qi       
10 cliff_NA           0.627   0.109  1.12     0.95 median qi       
# ℹ 42 more rows
Code
sites_repu$repunit == rpint_summ$repunit
 [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE
Code
tibble(x = sites_repu$repunit, y = rpint_summ$repunit)
# A tibble: 52 × 2
   x               y              
   <chr>           <fct>          
 1 bailey_NA       bailey_NA      
 2 blackrock_lower blackrock_lower
 3 blackrock_upper blackrock_upper
 4 blacktail_NA    blacktail_NA   
 5 blindbull_NA    blindbull_NA   
 6 boulder_NA      boulder_NA     
 7 box_NA          box_NA         
 8 cabin_NA        cabin_NA       
 9 clear_NA        clear_NA       
10 cliff_NA        cliff_NA       
# ℹ 42 more rows
Code
# add to vector
sites_repu$low <- rpint_summ$.lower[rpint_summ$repunit == sites_repu$repunit]
sites_repu$upp <- rpint_summ$.upper[rpint_summ$repunit == sites_repu$repunit]
sites_repu$med <- rpint_summ$r_repunit[rpint_summ$repunit == sites_repu$repunit]

# map it
mapview(flowline) + mapview(sites_repu, zcol = "med", col.regions = rev(hcl.colors(10, "Blue-Red")), alpha.regions = 1)

Random effect of section of the mainstem Snake River, offset to the global intercept. (These really don’t have any functional meaning, because proportions within sections must add to 1, so no section can have a larger or smaller average contribution. I included random intercepts following inclusion of random slopes for groundwater across sections, and including random slopes but not intercepts is generally not recommended.)

Code
p1 <- mod_zoib1 %>%
  spread_draws(r_section[section,]) %>%
  ungroup() %>%
  mutate(section = reorder(section, r_section),
         section = factor(recode(section, 
                            "snake_pacificdeadmans" = "A",
                            "snake_deadmansmoose" = "B",
                            "snake_moosewilson" = "C",
                            "snake_wilsonsouthpark" = "D",
                            "snake_southparkastoria" = "E",
                            "snake_astoriawesttable" = "F"), levels = c("F", "E", "D", "C", "B", "A"))) %>%
  ggplot(aes(y = section, x = r_section)) +
  stat_pointinterval(.width = c(.80, .95)) +
  geom_vline(xintercept = 0, linetype = "dashed") +
  ylab("Section") + xlab("Offset to intercept")
print(p1)

10.4.3 Zero-inflation

Inverse logit function

Code
invlogit <- function(x) { exp(x) / (1 + exp(x)) }

Plot zero-inflation effects: increasing distance increases the probability of observing 0 contribution, whereas increase catchment area and groundwater contribution decreases the probability of observing a 0. Further, probability of observing a 0 is greatest for tributaries in which connectivity is disrupted by a waterfall, intermediate for connected tributaries and those in which connectivity is disrupted by culverts/diversions, and lowest for tributaries in which connectivity is disrupted by low flow.

Code
plot(conditional_effects(mod_zoib1, dpar = "zi"), ask = FALSE, rug = TRUE)

How different is the probability of observing 0 for disconnected tributaries as compared to connected tributaries? 95% credible intervals all overlap 0. For type = “waterfall”, 90% CI does not overlap 0.

Code
mod_zoib1 %>%
  spread_draws(b_zi_Intercept, b_zi_connect_typeculvert_diversion, b_zi_connect_typelowflow, b_zi_connect_typewaterfall) %>%
  mutate(connected = invlogit(b_zi_Intercept),
         culvert_diversion = invlogit(b_zi_Intercept + b_zi_connect_typeculvert_diversion),
         lowflow = invlogit(b_zi_Intercept + b_zi_connect_typelowflow),
         waterfall = invlogit(b_zi_Intercept + b_zi_connect_typewaterfall)) %>%
  mutate(conn_culvert_diversion = culvert_diversion - connected,
         conn_lowflow = lowflow - connected,
         conn_waterfall = waterfall - connected) %>%
  gather(conn_culvert_diversion:conn_waterfall, key = "param", value = "estimate") %>%
  select(param, estimate) %>%
  group_by(param) %>%
  median_qi(.width = c(0.95, 0.9))
# A tibble: 6 × 7
  param                  estimate   .lower    .upper .width .point .interval
  <chr>                     <dbl>    <dbl>     <dbl>  <dbl> <chr>  <chr>    
1 conn_culvert_diversion   0.0256 -0.104    0.173      0.95 median qi       
2 conn_lowflow            -0.0790 -0.165    0.0155     0.95 median qi       
3 conn_waterfall           0.145  -0.00203  0.291      0.95 median qi       
4 conn_culvert_diversion   0.0256 -0.0858   0.150      0.9  median qi       
5 conn_lowflow            -0.0790 -0.153   -0.000606   0.9  median qi       
6 conn_waterfall           0.145   0.0232   0.268      0.9  median qi       
Code
mod_zoib1 %>%
  spread_draws(b_zi_Intercept, b_zi_connect_typeculvert_diversion, b_zi_connect_typelowflow, b_zi_connect_typewaterfall) %>%
  mutate(connected = invlogit(b_zi_Intercept),
         culvert_diversion = invlogit(b_zi_Intercept + b_zi_connect_typeculvert_diversion),
         lowflow = invlogit(b_zi_Intercept + b_zi_connect_typelowflow),
         waterfall = invlogit(b_zi_Intercept + b_zi_connect_typewaterfall)) %>%
  mutate(conn_culvert_diversion = culvert_diversion - connected,
         conn_lowflow = lowflow - connected,
         conn_waterfall = waterfall - connected) %>%
  gather(conn_culvert_diversion:conn_waterfall, key = "param", value = "estimate") %>%
  ggplot(aes(y = param, x = estimate)) +
  stat_halfeye() + xlab("Probability of observing 0 contribution, difference from type = 'connected' ") + ylab("") + 
  geom_vline(xintercept = 0, linetype = "dashed") +
  theme_bw()

10.4.3.1 Pretty plots

Connectivity
Code
mydraws <- mod_zoib1 %>% 
  spread_draws(b_zi_Intercept, b_zi_connect_typeculvert_diversion, b_zi_connect_typelowflow, b_zi_connect_typewaterfall) %>%
  mutate(beta_connected = invlogit(b_zi_Intercept),
         beta_culvdiv = invlogit(b_zi_Intercept + b_zi_connect_typeculvert_diversion),
         beta_lowflow = invlogit(b_zi_Intercept + b_zi_connect_typelowflow),
         beta_waterfall = invlogit(b_zi_Intercept + b_zi_connect_typewaterfall)) %>%
  select(-c(b_zi_Intercept, b_zi_connect_typeculvert_diversion, b_zi_connect_typelowflow, b_zi_connect_typewaterfall)) %>%
  summarise_draws()

Marginal effect of connectivity on proportional contribution of tributary streams to the mainstem Snake River population of YCT.
Distance

Connected only

Marginal effect of flowline distance (km) on proportional contribution of tributary streams to the mainstem Snake River population of YCT.
Area

Marginal effect of (log) catchment area (km^2) on proportional contribution of tributary streams to the mainstem Snake River population of YCT.
Groundwater

Connected only

Marginal effect of (log) groundwater availability on proportional contribution of tributary streams to the mainstem Snake River population of YCT.

10.4.4 Phi (precision)

Precision in model estimates increases with increasing distance and decreases with increasing catchment area and groundwater. Generally, covariate effects on precision are opposite that for proportional contributions, reflecting commonly observed pattern of greater variance for larger numbers. Additionally, limitations on dispersal distance may yield more predictable contributions from tributaries very far away, whereas factors not included in our model may lead to less precise estimates for tributaries located near the mainstem. Contributions from large catchments may be less precise due to additional sources of habitat heterogeneity that may affect total contributions from these areas. And contributions from groundwater-dominated tributaries may be less precise due factors (such as spawning gravel availability) that can limit spawning success and recruitment and vary considerbly among these stream types. Increased precision for disconnected connectivity types is not surprising (if fish can’t get through then they can’t get through) and illustrates the power of genetic monitoring tools like GSI for evaluating the effects of barriers on functional connectivity between tributaries and mainstem habitats.

Code
plot(conditional_effects(mod_zoib1, dpar = "phi"), ask = FALSE, rug = TRUE)

10.4.4.1 Pretty plots

Connectivity
Code
mydraws <- mod_zoib1 %>% 
  spread_draws(b_phi_Intercept, b_phi_connect_typeculvert_diversion, b_phi_connect_typelowflow, b_phi_connect_typewaterfall) %>%
  mutate(beta_connected = exp(b_phi_Intercept),
         beta_culvdiv = exp(b_phi_Intercept + b_phi_connect_typeculvert_diversion),
         beta_lowflow = exp(b_phi_Intercept + b_phi_connect_typelowflow),
         beta_waterfall = exp(b_phi_Intercept + b_phi_connect_typewaterfall)) %>%
  select(-c(b_phi_Intercept, b_phi_connect_typeculvert_diversion, b_phi_connect_typelowflow, b_phi_connect_typewaterfall)) %>%
  summarise_draws()

Marginal effect of connectivity on proportional contribution of tributary streams to the mainstem Snake River population of YCT.
Distance

Connected only

Marginal effect of flowline distance (km) on proportional contribution of tributary streams to the mainstem Snake River population of YCT.
Area

Marginal effect of (log) catchment area (km^2) on proportional contribution of tributary streams to the mainstem Snake River population of YCT.
Groundwater

Connected only

Marginal effect of (log) groundwater availability on proportional contribution of tributary streams to the mainstem Snake River population of YCT.